VSURF

Author

Miguel Fudolig

library(tidyverse)
library(ggplot2)
library(lavaan)
library(car)
library(glmnet)
library(randomForestSRC)
library(caret)
library(ggRandomForests)
library(VSURF)

Data set

This data set is from the 2015 Asian American Quality of Life survey. Participants are from Austin, Texas.

Input data set

qol <- read_csv("AAQoL.csv") |> mutate(across(where(is.character), ~as.factor(.x))) |> 
  mutate(`English Difficulties`=relevel(`English Difficulties`,ref="Not at all"),
         `English Speaking`=relevel(`English Speaking`,ref="Not at all"),
         Ethnicity = relevel(Ethnicity,ref="Chinese"),
         Religion=relevel(Religion,ref="None")) |> 
  mutate(Income_median = case_match(Income,"$0 - $9,999"~"Below",
                                         "$10,000 - $19,999" ~"Below",
                                         "$20,000 - $29,999"~"Below",
                                         "$30,000 - $39,999"~"Below",
                                         "$40,000 - $49,999"~"Below",
                                         "$50,000 - $59,999"~"Below",
                                         "$60,000 - $69,999"~"Above",
                                         "$70,000 and over"~"Above",
                                          .default=Income)) |> 
  mutate(Income_median = factor(Income_median, levels=c("Below","Above"))) |> 
  mutate(across(c(`Family Respect`:`Togetherness`,`Close-knit Community`:`Community Trust`),~relevel(.x,ref="Strongly disagree")))|> 
  mutate(across(c(`See Family`:`Helpful Friends`,`Discrimination`),~as.factor(.x)))
New names:
Rows: 2609 Columns: 231
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(190): Gender, Ethnicity, Marital Status, No One, Spouse, Children, Gran... dbl
(41): Survey ID, Age, Education Completed, Household Size, Grandparent,...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `Other` -> `Other...17`
• `Other` -> `Other...89`
qol |> DT::datatable()
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

Family

rfdata <- qol |> filter(Family %in% c("No","Yes")) |> 
  mutate(Family=droplevels(Family)) |> 
  select(Family, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Family~.,
                          data=rfdata,
                          seed=3)$data


VSURF(Family~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Family ~ ., imbal, na.action = "na.omit", parallel = T, : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 1 mins 

 VSURF selected: 
    34 variables at thresholding step (in 8.5 secs)
    29 variables at interpretation step (in 7.3 secs)
    14 variables at prediction step (in 46.6 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "Ethnicity"            "Age"                  "See.Friends"         
 [4] "Close.Family"         "English.Difficulties" "See.Family"          
 [7] "Religion"             "Helpful.Family"       "Helpful.Friends"     
[10] "Helpful.Community"    "Full.Time.Employment" "Community.Trust"     
[13] "English.Speaking"     "Successful.Family"   
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Ethnicity"               "Age"                    
 [3] "See.Friends"             "Close.Family"           
 [5] "English.Difficulties"    "See.Family"             
 [7] "Religion"                "Helpful.Family"         
 [9] "Helpful.Friends"         "Helpful.Community"      
[11] "Close.Friends"           "Full.Time.Employment"   
[13] "Community.Trust"         "Get.Along"              
[15] "Religious.Importance"    "English.Speaking"       
[17] "Community.Shares.Values" "Expression"             
[19] "Religious.Attendance"    "Close.knit.Community"   
[21] "Family.Respect"          "Successful.Family"      
[23] "Discrimination"          "Feel.Close"             
[25] "Similar.Values"          "Spend.Time.Together"    
[27] "Togetherness"            "Trust"                  
[29] "Loyalty"                
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.1639408

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)|> 
  mutate(fill = case_when(Variable=="Ethnicity"~"red",
                                                 .default="black"))

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance  fill
1                Ethnicity    0.06500       0.00138   red
2                      Age    0.05911       0.00097 black
3              See.Friends    0.04469       0.00107 black
4             Close.Family    0.03984       0.00089 black
5     English.Difficulties    0.03830       0.00091 black
6               See.Family    0.03650       0.00090 black
7                 Religion    0.03581       0.00054 black
8           Helpful.Family    0.03328       0.00070 black
9          Helpful.Friends    0.03266       0.00065 black
10       Helpful.Community    0.03202       0.00114 black
11           Close.Friends    0.02930       0.00075 black
12    Full.Time.Employment    0.02876       0.00130 black
13         Community.Trust    0.02418       0.00092 black
14               Get.Along    0.02123       0.00086 black
15    Religious.Importance    0.02044       0.00089 black
16        English.Speaking    0.02008       0.00063 black
17 Community.Shares.Values    0.01954       0.00072 black
18              Expression    0.01950       0.00090 black
19    Religious.Attendance    0.01943       0.00082 black
20    Close.knit.Community    0.01633       0.00052 black
21          Family.Respect    0.01632       0.00096 black
22       Successful.Family    0.01439       0.00062 black
23          Discrimination    0.01343       0.00081 black
24              Feel.Close    0.01168       0.00046 black
25          Similar.Values    0.01069       0.00061 black
26     Spend.Time.Together    0.01060       0.00058 black
27            Togetherness    0.01015       0.00062 black
28                   Trust    0.00993       0.00056 black
29                 Loyalty    0.00966       0.00052 black
30                  Gender    0.00923       0.00060 black
31        Dental.Insurance    0.00800       0.00042 black
32            Family.Pride    0.00726       0.00037 black
33           Income_median    0.00635       0.00029 black
34        Health.Insurance    0.00355       0.00022 black
importance_plot <- ggplot(vi, aes(x = reorder(Variable, Importance), y = Importance, fill=fill))+
  geom_bar(stat = "identity",alpha=0.4) +
  geom_errorbar(aes(ymin=Importance-sd_Importance, ymax = Importance+sd_Importance))+
  
  labs(title = "Variable Importance", x = "Variable", y = "Importance") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values=c("black","red"),
                    guide="none")
  
plot(importance_plot)

ggsave(filename = "VSURF_importance_family.png", width=8, height=12,units="in")

Logistic regression (Interpretation)

lr <- rfdata |> select(Family,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Family~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Family ~ ., family = binomial, data = lr)

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)
(Intercept)                            -0.151943   0.639958  -0.237 0.812326
EthnicityAsian Indian                  -0.192905   0.296161  -0.651 0.514820
EthnicityFilipino                      -0.446187   0.225409  -1.979 0.047765
EthnicityKorean                        -0.741733   0.169726  -4.370 1.24e-05
EthnicityOther                         -0.859275   0.249074  -3.450 0.000561
EthnicityVietnamese                    -0.746836   0.183016  -4.081 4.49e-05
Age                                    -0.015818   0.003318  -4.767 1.87e-06
See.Friends1                           -0.853346   0.336875  -2.533 0.011305
See.Friends2                           -0.843787   0.313717  -2.690 0.007153
See.Friends3                           -0.703854   0.306124  -2.299 0.021491
See.Friends4                           -0.570473   0.321137  -1.776 0.075664
See.Friends5                           -0.431305   0.329648  -1.308 0.190745
Close.Family1                           0.587378   0.242732   2.420 0.015526
Close.Family2                           0.329151   0.235289   1.399 0.161836
Close.Family3                           0.485595   0.239437   2.028 0.042553
Close.Family4                           0.427955   0.288749   1.482 0.138313
Close.Family5                           0.348586   0.342392   1.018 0.308634
English.DifficultiesMuch                0.540725   0.163427   3.309 0.000937
English.DifficultiesNot much            0.145775   0.151769   0.961 0.336798
English.DifficultiesVery much          -0.192473   0.146085  -1.318 0.187658
See.Family1                             0.385263   0.307594   1.253 0.210386
See.Family2                             0.361555   0.261022   1.385 0.166006
See.Family3                             0.251662   0.242713   1.037 0.299796
See.Family4                             0.371602   0.259331   1.433 0.151879
See.Family5                             0.129077   0.276714   0.466 0.640882
ReligionBuddhist                        0.212525   0.192510   1.104 0.269606
ReligionCatholic                        0.188857   0.188028   1.004 0.315182
ReligionHindu                          -0.223376   0.307336  -0.727 0.467340
ReligionMuslim                         -0.092396   0.381967  -0.242 0.808862
ReligionOther                          -0.253753   0.402710  -0.630 0.528621
ReligionProtestant                      0.053290   0.158890   0.335 0.737329
Helpful.Family1                         0.184640   0.334258   0.552 0.580683
Helpful.Family2                         0.500517   0.322085   1.554 0.120187
Helpful.Family3                         0.602626   0.326501   1.846 0.064934
Helpful.Family4                         0.917988   0.355481   2.582 0.009812
Helpful.Family5                         0.826266   0.392080   2.107 0.035084
Helpful.Friends1                       -0.075096   0.279876  -0.268 0.788453
Helpful.Friends2                       -0.079572   0.265097  -0.300 0.764054
Helpful.Friends3                       -0.287158   0.269523  -1.065 0.286682
Helpful.Friends4                       -0.200507   0.303249  -0.661 0.508487
Helpful.Friends5                       -0.214294   0.323670  -0.662 0.507922
Helpful.CommunityAgree                  0.823976   0.455258   1.810 0.070310
Helpful.CommunityDisagree               1.163599   0.463733   2.509 0.012100
Helpful.CommunityNeutral                0.807602   0.451168   1.790 0.073450
Helpful.CommunityStrongly agree         0.883474   0.491665   1.797 0.072351
Full.Time.EmploymentEmployed full time -0.410523   0.101625  -4.040 5.35e-05
Community.TrustAgree                   -0.006419   0.386045  -0.017 0.986733
Community.TrustDisagree                -0.008605   0.382796  -0.022 0.982065
Community.TrustNeutral                 -0.126423   0.375786  -0.336 0.736552
Community.TrustStrongly agree           0.061553   0.441427   0.139 0.889101
English.SpeakingNot well               -0.171237   0.252077  -0.679 0.496945
English.SpeakingVery well              -0.007144   0.262718  -0.027 0.978305
English.SpeakingWell                   -0.210886   0.249962  -0.844 0.398854
Successful.FamilySomewhat agree         0.131823   0.303391   0.435 0.663925
Successful.FamilySomewhat disagree      0.291325   0.336694   0.865 0.386901
Successful.FamilyStrongly agree         0.279404   0.303984   0.919 0.358022
                                          
(Intercept)                               
EthnicityAsian Indian                     
EthnicityFilipino                      *  
EthnicityKorean                        ***
EthnicityOther                         ***
EthnicityVietnamese                    ***
Age                                    ***
See.Friends1                           *  
See.Friends2                           ** 
See.Friends3                           *  
See.Friends4                           .  
See.Friends5                              
Close.Family1                          *  
Close.Family2                             
Close.Family3                          *  
Close.Family4                             
Close.Family5                             
English.DifficultiesMuch               ***
English.DifficultiesNot much              
English.DifficultiesVery much             
See.Family1                               
See.Family2                               
See.Family3                               
See.Family4                               
See.Family5                               
ReligionBuddhist                          
ReligionCatholic                          
ReligionHindu                             
ReligionMuslim                            
ReligionOther                             
ReligionProtestant                        
Helpful.Family1                           
Helpful.Family2                           
Helpful.Family3                        .  
Helpful.Family4                        ** 
Helpful.Family5                        *  
Helpful.Friends1                          
Helpful.Friends2                          
Helpful.Friends3                          
Helpful.Friends4                          
Helpful.Friends5                          
Helpful.CommunityAgree                 .  
Helpful.CommunityDisagree              *  
Helpful.CommunityNeutral               .  
Helpful.CommunityStrongly agree        .  
Full.Time.EmploymentEmployed full time ***
Community.TrustAgree                      
Community.TrustDisagree                   
Community.TrustNeutral                    
Community.TrustStrongly agree             
English.SpeakingNot well                  
English.SpeakingVery well                 
English.SpeakingWell                      
Successful.FamilySomewhat agree           
Successful.FamilySomewhat disagree        
Successful.FamilyStrongly agree           
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2667.5  on 1925  degrees of freedom
Residual deviance: 2494.4  on 1870  degrees of freedom
AIC: 2606.4

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Family
                     LR Chisq Df Pr(>Chisq)    
Ethnicity             30.9275  5  9.681e-06 ***
Age                   22.9842  1  1.633e-06 ***
See.Friends           11.6087  5  0.0405619 *  
Close.Family           7.3520  5  0.1957467    
English.Difficulties  20.2105  3  0.0001535 ***
See.Family             4.2583  5  0.5128586    
Religion               3.5836  6  0.7328123    
Helpful.Family        10.7345  5  0.0569065 .  
Helpful.Friends        2.7763  5  0.7344305    
Helpful.Community      7.5326  4  0.1102821    
Full.Time.Employment  16.4111  1  5.098e-05 ***
Community.Trust        1.2493  4  0.8699092    
English.Speaking       2.6321  3  0.4518928    
Successful.Family      2.6171  3  0.4544932    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T) |> DT::datatable()

Health Professionals

rfdata <- qol |> select(`Heal Professionals`, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Heal.Professionals~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Heal.Professionals~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Heal.Professionals ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 32.6 secs 

 VSURF selected: 
    34 variables at thresholding step (in 8.6 secs)
    15 variables at interpretation step (in 6.7 secs)
    13 variables at prediction step (in 17.2 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "English.Speaking"     "Dental.Insurance"     "Religion"            
 [4] "English.Difficulties" "See.Family"           "Helpful.Friends"     
 [7] "Close.Family"         "See.Friends"          "Helpful.Family"      
[10] "Close.Friends"        "Community.Trust"      "Get.Along"           
[13] "Health.Insurance"    
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "English.Speaking"     "Ethnicity"            "Dental.Insurance"    
 [4] "Religion"             "English.Difficulties" "See.Family"          
 [7] "Helpful.Friends"      "Close.Family"         "See.Friends"         
[10] "Helpful.Family"       "Close.Friends"        "Community.Trust"     
[13] "Religious.Attendance" "Get.Along"            "Health.Insurance"    
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.1527504

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)|> 
  mutate(fill = case_when(Variable=="Ethnicity"~"red",
                                                 .default="black"))

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance  fill
1         English.Speaking    0.06230       0.00164 black
2                Ethnicity    0.05856       0.00108   red
3         Dental.Insurance    0.05608       0.00154 black
4                 Religion    0.05139       0.00092 black
5     English.Difficulties    0.04493       0.00118 black
6               See.Family    0.04208       0.00069 black
7          Helpful.Friends    0.03669       0.00063 black
8             Close.Family    0.03392       0.00066 black
9              See.Friends    0.03375       0.00073 black
10          Helpful.Family    0.03335       0.00064 black
11           Close.Friends    0.03258       0.00072 black
12         Community.Trust    0.02832       0.00089 black
13    Religious.Attendance    0.02703       0.00076 black
14               Get.Along    0.02444       0.00072 black
15        Health.Insurance    0.02439       0.00085 black
16                     Age    0.02324       0.00068 black
17       Helpful.Community    0.02284       0.00082 black
18    Close.knit.Community    0.02095       0.00071 black
19 Community.Shares.Values    0.01993       0.00078 black
20           Income_median    0.01901       0.00089 black
21    Religious.Importance    0.01819       0.00061 black
22     Spend.Time.Together    0.01746       0.00066 black
23       Successful.Family    0.01062       0.00051 black
24              Expression    0.01060       0.00044 black
25          Family.Respect    0.01034       0.00055 black
26              Feel.Close    0.00951       0.00054 black
27            Family.Pride    0.00923       0.00045 black
28          Similar.Values    0.00919       0.00034 black
29                 Loyalty    0.00844       0.00049 black
30                   Trust    0.00835       0.00039 black
31                  Gender    0.00811       0.00044 black
32    Full.Time.Employment    0.00740       0.00049 black
33            Togetherness    0.00727       0.00033 black
34          Discrimination    0.00589       0.00036 black
importance_plot <- ggplot(vi, aes(x = reorder(Variable, Importance), y = Importance, fill=fill))+
  geom_bar(stat = "identity",alpha=0.4) +
  geom_errorbar(aes(ymin=Importance-sd_Importance, ymax = Importance+sd_Importance))+
  
  labs(title = "Variable Importance", x = "Variable", y = "Importance") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values=c("black","red"),
                    guide="none")
  
plot(importance_plot)

ggsave(filename = "VSURF_importance_hp.png", width=12, height=8,units="in")

Logistic regression (Interpretation)

lr <- rfdata |> select(Heal.Professionals,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Heal.Professionals~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Heal.Professionals ~ ., family = binomial, data = lr)

Coefficients:
                               Estimate Std. Error z value Pr(>|z|)    
(Intercept)                   -2.300882   0.569952  -4.037 5.41e-05 ***
English.SpeakingNot well       0.251551   0.264881   0.950 0.342277    
English.SpeakingVery well      1.099407   0.266594   4.124 3.73e-05 ***
English.SpeakingWell           0.845659   0.258054   3.277 0.001049 ** 
Dental.InsuranceYes            0.312464   0.114703   2.724 0.006447 ** 
ReligionBuddhist               0.467136   0.178422   2.618 0.008841 ** 
ReligionCatholic               0.349362   0.163092   2.142 0.032185 *  
ReligionHindu                 -0.468336   0.168810  -2.774 0.005531 ** 
ReligionMuslim                 0.267734   0.332710   0.805 0.420988    
ReligionOther                  0.422495   0.414152   1.020 0.307660    
ReligionProtestant             0.091709   0.147868   0.620 0.535121    
English.DifficultiesMuch      -0.442586   0.164929  -2.683 0.007286 ** 
English.DifficultiesNot much  -0.302170   0.153173  -1.973 0.048526 *  
English.DifficultiesVery much -0.305888   0.147497  -2.074 0.038092 *  
See.Family1                    0.266631   0.309559   0.861 0.389060    
See.Family2                    0.397140   0.265486   1.496 0.134681    
See.Family3                    0.363831   0.246453   1.476 0.139872    
See.Family4                    0.506602   0.263155   1.925 0.054216 .  
See.Family5                    0.380649   0.280586   1.357 0.174901    
Helpful.Friends1              -0.495859   0.311957  -1.590 0.111946    
Helpful.Friends2              -0.241957   0.297771  -0.813 0.416470    
Helpful.Friends3              -0.120127   0.307556  -0.391 0.696103    
Helpful.Friends4              -0.113578   0.350369  -0.324 0.745811    
Helpful.Friends5               0.006169   0.386592   0.016 0.987267    
Close.Family1                 -0.012196   0.242751  -0.050 0.959930    
Close.Family2                  0.051024   0.234865   0.217 0.828015    
Close.Family3                 -0.031272   0.239950  -0.130 0.896306    
Close.Family4                  0.070974   0.291916   0.243 0.807904    
Close.Family5                  0.057326   0.353754   0.162 0.871267    
See.Friends1                   0.373373   0.356221   1.048 0.294569    
See.Friends2                   0.350593   0.340794   1.029 0.303595    
See.Friends3                   0.587777   0.331973   1.771 0.076635 .  
See.Friends4                   0.602412   0.345953   1.741 0.081629 .  
See.Friends5                   0.801026   0.352119   2.275 0.022914 *  
Helpful.Family1               -0.068213   0.326959  -0.209 0.834738    
Helpful.Family2               -0.106419   0.315159  -0.338 0.735613    
Helpful.Family3               -0.231407   0.318939  -0.726 0.468113    
Helpful.Family4                0.004491   0.350278   0.013 0.989772    
Helpful.Family5               -0.175251   0.386057  -0.454 0.649864    
Close.Friends1                 0.580733   0.268130   2.166 0.030322 *  
Close.Friends2                 0.275533   0.265235   1.039 0.298887    
Close.Friends3                 0.226748   0.274193   0.827 0.408257    
Close.Friends4                 0.380765   0.334195   1.139 0.254558    
Close.Friends5                -0.300196   0.379675  -0.791 0.429139    
Community.TrustAgree           0.791737   0.469397   1.687 0.091659 .  
Community.TrustDisagree        0.582946   0.452785   1.287 0.197931    
Community.TrustNeutral         0.646587   0.456046   1.418 0.156246    
Community.TrustStrongly agree  0.416849   0.542431   0.768 0.442201    
Get.AlongAgree                -0.519597   0.530808  -0.979 0.327640    
Get.AlongDisagree             -0.439306   0.516476  -0.851 0.395001    
Get.AlongNeutral              -0.584303   0.521787  -1.120 0.262794    
Get.AlongStrongly agree       -0.306626   0.589134  -0.520 0.602736    
Health.InsuranceYes            0.626447   0.167038   3.750 0.000177 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2668.3  on 1926  degrees of freedom
Residual deviance: 2421.6  on 1874  degrees of freedom
AIC: 2527.6

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Heal.Professionals
                     LR Chisq Df Pr(>Chisq)    
English.Speaking       36.073  3  7.226e-08 ***
Dental.Insurance        7.401  1  0.0065189 ** 
Religion               35.309  6  3.754e-06 ***
English.Difficulties    8.324  3  0.0397713 *  
See.Family              4.035  5  0.5443406    
Helpful.Friends         4.059  5  0.5408867    
Close.Family            0.536  5  0.9907295    
See.Friends             7.721  5  0.1722914    
Helpful.Family          2.594  5  0.7623492    
Close.Friends           9.996  5  0.0753429 .  
Community.Trust         4.356  4  0.3599330    
Get.Along               2.117  4  0.7142667    
Health.Insurance       14.439  1  0.0001448 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()

Physical Check-up

#install.packages("randomForestSRC)

rfdata <- qol |> 
  select(`Physical Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) %>%
  na.omit() |> 
  rename(Employment=`Full Time Employment`,
         EnglishSpeak=`English Speaking`,
         EnglishDiff=`English Difficulties`) |> 
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Physical.Check.up~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Physical.Check.up~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Physical.Check.up ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 32.2 secs 

 VSURF selected: 
    34 variables at thresholding step (in 7.9 secs)
    17 variables at interpretation step (in 6.4 secs)
    15 variables at prediction step (in 17.9 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "Dental.Insurance"        "Age"                    
 [3] "Health.Insurance"        "Helpful.Family"         
 [5] "Religion"                "Income_median"          
 [7] "EnglishDiff"             "See.Family"             
 [9] "Close.Family"            "Close.Friends"          
[11] "EnglishSpeak"            "See.Friends"            
[13] "Community.Shares.Values" "Close.knit.Community"   
[15] "Community.Trust"        
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Dental.Insurance"        "Age"                    
 [3] "Health.Insurance"        "Ethnicity"              
 [5] "Helpful.Family"          "Religion"               
 [7] "Income_median"           "EnglishDiff"            
 [9] "See.Family"              "Close.Family"           
[11] "Close.Friends"           "EnglishSpeak"           
[13] "See.Friends"             "Helpful.Friends"        
[15] "Community.Shares.Values" "Close.knit.Community"   
[17] "Community.Trust"        
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.1037278

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)|> 
  mutate(fill = case_when(Variable=="Ethnicity"~"red",
                                                 .default="black"))

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance  fill
1         Dental.Insurance    0.09161       0.00198 black
2                      Age    0.07237       0.00111 black
3         Health.Insurance    0.05501       0.00159 black
4                Ethnicity    0.05173       0.00135   red
5           Helpful.Family    0.04063       0.00081 black
6                 Religion    0.03983       0.00104 black
7            Income_median    0.03790       0.00174 black
8              EnglishDiff    0.03604       0.00103 black
9               See.Family    0.03404       0.00071 black
10            Close.Family    0.02988       0.00086 black
11           Close.Friends    0.02695       0.00078 black
12            EnglishSpeak    0.02679       0.00090 black
13             See.Friends    0.02582       0.00071 black
14         Helpful.Friends    0.02509       0.00055 black
15 Community.Shares.Values    0.02423       0.00088 black
16    Close.knit.Community    0.02380       0.00059 black
17         Community.Trust    0.02342       0.00073 black
18               Get.Along    0.02267       0.00098 black
19    Religious.Attendance    0.01972       0.00051 black
20       Helpful.Community    0.01717       0.00050 black
21                  Gender    0.01704       0.00088 black
22    Religious.Importance    0.01701       0.00055 black
23              Expression    0.01411       0.00074 black
24       Successful.Family    0.01194       0.00057 black
25     Spend.Time.Together    0.01053       0.00038 black
26          Similar.Values    0.00962       0.00040 black
27          Discrimination    0.00956       0.00059 black
28          Family.Respect    0.00886       0.00037 black
29              Feel.Close    0.00863       0.00038 black
30              Employment    0.00834       0.00055 black
31                 Loyalty    0.00778       0.00041 black
32            Family.Pride    0.00760       0.00030 black
33                   Trust    0.00668       0.00038 black
34            Togetherness    0.00634       0.00029 black
importance_plot <- ggplot(vi, aes(x = reorder(Variable, Importance), y = Importance, fill=fill))+
  geom_bar(stat = "identity",alpha=0.4) +
  geom_errorbar(aes(ymin=Importance-sd_Importance, ymax = Importance+sd_Importance))+
  
  labs(title = "Variable Importance", x = "Variable", y = "Importance") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values=c("black","red"),
                    guide="none")
  
plot(importance_plot)

ggsave(filename = "VSURF_importance_PC.png", width=12, height=8,units="in")

Logistic regression (Interpretation)

lr <- rfdata |> select(Physical.Check.up,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Physical.Check.up~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Physical.Check.up ~ ., family = binomial, data = lr)

Coefficients:
                                       Estimate Std. Error z value Pr(>|z|)    
(Intercept)                           -2.885273   0.643835  -4.481 7.42e-06 ***
Dental.InsuranceYes                    0.618889   0.130749   4.733 2.21e-06 ***
Age                                    0.037943   0.004081   9.297  < 2e-16 ***
Health.InsuranceYes                    1.235977   0.168603   7.331 2.29e-13 ***
Helpful.Family1                       -0.031385   0.363957  -0.086  0.93128    
Helpful.Family2                       -0.395079   0.345046  -1.145  0.25221    
Helpful.Family3                       -0.151513   0.346810  -0.437  0.66220    
Helpful.Family4                       -0.361587   0.376575  -0.960  0.33695    
Helpful.Family5                       -0.312161   0.413601  -0.755  0.45041    
ReligionBuddhist                       0.211883   0.201085   1.054  0.29202    
ReligionCatholic                       0.152188   0.183801   0.828  0.40767    
ReligionHindu                         -0.220516   0.187516  -1.176  0.23960    
ReligionMuslim                        -0.052847   0.386113  -0.137  0.89113    
ReligionOther                         -0.015476   0.452428  -0.034  0.97271    
ReligionProtestant                    -0.277278   0.161935  -1.712  0.08684 .  
Income_medianAbove                     0.273903   0.122624   2.234  0.02550 *  
EnglishDiffMuch                       -0.180242   0.189759  -0.950  0.34219    
EnglishDiffNot much                   -0.534471   0.175164  -3.051  0.00228 ** 
EnglishDiffVery much                  -0.315604   0.168328  -1.875  0.06080 .  
See.Family1                            0.265111   0.355056   0.747  0.45526    
See.Family2                           -0.197328   0.290885  -0.678  0.49754    
See.Family3                           -0.105315   0.271954  -0.387  0.69857    
See.Family4                           -0.025685   0.292231  -0.088  0.92996    
See.Family5                           -0.033497   0.313288  -0.107  0.91485    
Close.Family1                          0.023836   0.269928   0.088  0.92964    
Close.Family2                          0.361724   0.260074   1.391  0.16427    
Close.Family3                          0.204018   0.267513   0.763  0.44568    
Close.Family4                          0.136660   0.324798   0.421  0.67394    
Close.Family5                         -0.238072   0.389460  -0.611  0.54101    
Close.Friends1                         0.246674   0.271003   0.910  0.36270    
Close.Friends2                        -0.016481   0.264998  -0.062  0.95041    
Close.Friends3                         0.015876   0.266783   0.060  0.95255    
Close.Friends4                         0.183421   0.323805   0.566  0.57108    
Close.Friends5                         0.031268   0.352851   0.089  0.92939    
EnglishSpeakNot well                   0.487198   0.266247   1.830  0.06727 .  
EnglishSpeakVery well                  0.920756   0.285594   3.224  0.00126 ** 
EnglishSpeakWell                       0.760066   0.266735   2.850  0.00438 ** 
See.Friends1                           0.273638   0.370295   0.739  0.45992    
See.Friends2                           0.193270   0.345511   0.559  0.57591    
See.Friends3                           0.234502   0.334260   0.702  0.48296    
See.Friends4                           0.207026   0.350242   0.591  0.55446    
See.Friends5                           0.485086   0.356099   1.362  0.17313    
Community.Shares.ValuesAgree           0.472854   0.482594   0.980  0.32718    
Community.Shares.ValuesDisagree        0.124787   0.472332   0.264  0.79163    
Community.Shares.ValuesNeutral         0.362194   0.471921   0.767  0.44279    
Community.Shares.ValuesStrongly agree  0.500342   0.551903   0.907  0.36463    
Close.knit.CommunityAgree              0.148911   0.377084   0.395  0.69291    
Close.knit.CommunityDisagree           0.375671   0.379135   0.991  0.32175    
Close.knit.CommunityNeutral           -0.046356   0.366989  -0.126  0.89948    
Close.knit.CommunityStrongly agree     0.526118   0.440069   1.196  0.23188    
Community.TrustAgree                  -0.477934   0.437691  -1.092  0.27486    
Community.TrustDisagree               -0.277239   0.430302  -0.644  0.51939    
Community.TrustNeutral                -0.494929   0.420999  -1.176  0.23975    
Community.TrustStrongly agree         -0.924469   0.519058  -1.781  0.07490 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2405.0  on 1917  degrees of freedom
Residual deviance: 2054.1  on 1864  degrees of freedom
AIC: 2162.1

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Physical.Check.up
                        LR Chisq Df Pr(>Chisq)    
Dental.Insurance          22.270  1  2.369e-06 ***
Age                       96.446  1  < 2.2e-16 ***
Health.Insurance          56.073  1  6.982e-14 ***
Helpful.Family             4.924  5   0.425225    
Religion                  11.242  6   0.081191 .  
Income_median              4.974  1   0.025735 *  
EnglishDiff               11.635  3   0.008744 ** 
See.Family                 2.778  5   0.734096    
Close.Family               6.128  5   0.293966    
Close.Friends              2.501  5   0.776273    
EnglishSpeak              11.981  3   0.007449 ** 
See.Friends                3.463  5   0.628955    
Community.Shares.Values    3.037  4   0.551656    
Close.knit.Community       8.204  4   0.084401 .  
Community.Trust            4.263  4   0.371563    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()

Dental Check-up

rfdata <- qol |> select(`Dentist Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Dentist.Check.up~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Dentist.Check.up~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Dentist.Check.up ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 23.4 secs 

 VSURF selected: 
    34 variables at thresholding step (in 8 secs)
    18 variables at interpretation step (in 7.1 secs)
    7 variables at prediction step (in 8.3 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
[1] "Dental.Insurance"        "Ethnicity"              
[3] "English.Speaking"        "Health.Insurance"       
[5] "Community.Trust"         "Community.Shares.Values"
[7] "Close.knit.Community"   
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Dental.Insurance"        "Ethnicity"              
 [3] "Religion"                "Age"                    
 [5] "See.Friends"             "See.Family"             
 [7] "Close.Friends"           "Income_median"          
 [9] "Helpful.Family"          "Close.Family"           
[11] "English.Difficulties"    "Helpful.Friends"        
[13] "Religious.Importance"    "English.Speaking"       
[15] "Health.Insurance"        "Community.Trust"        
[17] "Community.Shares.Values" "Close.knit.Community"   
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.111906

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)|> 
  mutate(fill = case_when(Variable=="Ethnicity"~"red",
                                                 .default="black"))

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance  fill
1         Dental.Insurance    0.11737       0.00143 black
2                Ethnicity    0.06255       0.00156   red
3                 Religion    0.05053       0.00114 black
4                      Age    0.04821       0.00137 black
5              See.Friends    0.04104       0.00088 black
6               See.Family    0.03811       0.00089 black
7            Close.Friends    0.03651       0.00065 black
8            Income_median    0.03501       0.00190 black
9           Helpful.Family    0.03278       0.00070 black
10            Close.Family    0.03266       0.00104 black
11    English.Difficulties    0.02789       0.00074 black
12         Helpful.Friends    0.02611       0.00062 black
13    Religious.Importance    0.02575       0.00072 black
14        English.Speaking    0.02446       0.00074 black
15        Health.Insurance    0.02414       0.00108 black
16         Community.Trust    0.02173       0.00067 black
17 Community.Shares.Values    0.01749       0.00037 black
18    Close.knit.Community    0.01743       0.00059 black
19               Get.Along    0.01634       0.00051 black
20    Religious.Attendance    0.01588       0.00053 black
21       Helpful.Community    0.01526       0.00050 black
22                  Gender    0.01339       0.00067 black
23     Spend.Time.Together    0.01195       0.00041 black
24              Feel.Close    0.01184       0.00050 black
25              Expression    0.00993       0.00036 black
26          Similar.Values    0.00980       0.00044 black
27       Successful.Family    0.00921       0.00044 black
28          Discrimination    0.00783       0.00038 black
29    Full.Time.Employment    0.00736       0.00050 black
30                   Trust    0.00730       0.00033 black
31          Family.Respect    0.00679       0.00034 black
32            Family.Pride    0.00640       0.00026 black
33            Togetherness    0.00596       0.00035 black
34                 Loyalty    0.00520       0.00025 black
importance_plot <- ggplot(vi, aes(x = reorder(Variable, Importance), y = Importance, fill=fill))+
  geom_bar(stat = "identity",alpha=0.4) +
  geom_errorbar(aes(ymin=Importance-sd_Importance, ymax = Importance+sd_Importance))+
  
  labs(title = "Variable Importance", x = "Variable", y = "Importance") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values=c("black","red"),
                    guide="none")
  
plot(importance_plot)

ggsave(filename = "VSURF_importance_Dc.png", width=12, height=8,units="in")

Logistic regression (Interpretation)

lr <- rfdata |> select(Dentist.Check.up,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Dentist.Check.up~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Dentist.Check.up ~ ., family = binomial, data = lr)

Coefficients:
                                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)                           -1.69290    0.44772  -3.781 0.000156 ***
Dental.InsuranceYes                    1.38576    0.11872  11.672  < 2e-16 ***
EthnicityAsian Indian                 -1.32396    0.16662  -7.946 1.92e-15 ***
EthnicityFilipino                     -0.20775    0.21632  -0.960 0.336859    
EthnicityKorean                       -0.38852    0.16064  -2.419 0.015583 *  
EthnicityOther                        -0.42908    0.25378  -1.691 0.090879 .  
EthnicityVietnamese                   -0.08924    0.16700  -0.534 0.593081    
English.SpeakingNot well               0.86831    0.27087   3.206 0.001347 ** 
English.SpeakingVery well              1.30769    0.27446   4.765 1.89e-06 ***
English.SpeakingWell                   0.94587    0.27070   3.494 0.000476 ***
Health.InsuranceYes                    0.47126    0.16708   2.821 0.004794 ** 
Community.TrustAgree                   0.18361    0.41844   0.439 0.660809    
Community.TrustDisagree               -0.13516    0.41120  -0.329 0.742389    
Community.TrustNeutral                 0.13742    0.40449   0.340 0.734061    
Community.TrustStrongly agree         -0.22904    0.50261  -0.456 0.648604    
Community.Shares.ValuesAgree           0.39102    0.45290   0.863 0.387934    
Community.Shares.ValuesDisagree        0.21608    0.44427   0.486 0.626700    
Community.Shares.ValuesNeutral         0.27058    0.44223   0.612 0.540631    
Community.Shares.ValuesStrongly agree -0.02971    0.52298  -0.057 0.954692    
Close.knit.CommunityAgree             -0.24180    0.37320  -0.648 0.517052    
Close.knit.CommunityDisagree           0.01365    0.37557   0.036 0.971005    
Close.knit.CommunityNeutral           -0.19066    0.36411  -0.524 0.600528    
Close.knit.CommunityStrongly agree     0.67349    0.43246   1.557 0.119386    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2593.0  on 1914  degrees of freedom
Residual deviance: 2204.4  on 1892  degrees of freedom
AIC: 2250.4

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Dentist.Check.up
                        LR Chisq Df Pr(>Chisq)    
Dental.Insurance         141.886  1  < 2.2e-16 ***
Ethnicity                 83.650  5  < 2.2e-16 ***
English.Speaking          27.434  3  4.775e-06 ***
Health.Insurance           8.088  1   0.004455 ** 
Community.Trust            3.844  4   0.427558    
Community.Shares.Values    2.918  4   0.571617    
Close.knit.Community      14.337  4   0.006295 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()

Folkmedicine

rfdata <- qol |> select(`Folkmedicine`, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Folkmedicine~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Folkmedicine~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Folkmedicine ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 52.4 secs 

 VSURF selected: 
    34 variables at thresholding step (in 7.5 secs)
    28 variables at interpretation step (in 6.8 secs)
    15 variables at prediction step (in 38.1 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "Ethnicity"            "Age"                  "See.Friends"         
 [4] "Religion"             "English.Speaking"     "See.Family"          
 [7] "Helpful.Family"       "Full.Time.Employment" "Close.Family"        
[10] "Religious.Attendance" "Helpful.Friends"      "English.Difficulties"
[13] "Close.Friends"        "Get.Along"            "Religious.Importance"
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Ethnicity"               "Age"                    
 [3] "See.Friends"             "Religion"               
 [5] "English.Speaking"        "See.Family"             
 [7] "Helpful.Family"          "Full.Time.Employment"   
 [9] "Close.Family"            "Religious.Attendance"   
[11] "Helpful.Friends"         "English.Difficulties"   
[13] "Close.Friends"           "Get.Along"              
[15] "Religious.Importance"    "Helpful.Community"      
[17] "Close.knit.Community"    "Community.Trust"        
[19] "Community.Shares.Values" "Discrimination"         
[21] "Feel.Close"              "Similar.Values"         
[23] "Expression"              "Gender"                 
[25] "Spend.Time.Together"     "Togetherness"           
[27] "Trust"                   "Successful.Family"      
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.04842022

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)|> 
  mutate(fill = case_when(Variable=="Ethnicity"~"red",
                                                 .default="black"))

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance  fill
1                Ethnicity    0.10602       0.00196   red
2                      Age    0.09538       0.00156 black
3              See.Friends    0.06796       0.00142 black
4                 Religion    0.06783       0.00227 black
5         English.Speaking    0.06154       0.00200 black
6               See.Family    0.05878       0.00101 black
7           Helpful.Family    0.05443       0.00122 black
8     Full.Time.Employment    0.05117       0.00174 black
9             Close.Family    0.04806       0.00086 black
10    Religious.Attendance    0.04505       0.00146 black
11         Helpful.Friends    0.04279       0.00131 black
12    English.Difficulties    0.04279       0.00143 black
13           Close.Friends    0.04039       0.00088 black
14               Get.Along    0.03517       0.00120 black
15    Religious.Importance    0.03280       0.00103 black
16       Helpful.Community    0.03131       0.00130 black
17    Close.knit.Community    0.02980       0.00095 black
18         Community.Trust    0.02932       0.00113 black
19 Community.Shares.Values    0.02833       0.00076 black
20          Discrimination    0.02112       0.00116 black
21              Feel.Close    0.01956       0.00080 black
22          Similar.Values    0.01921       0.00071 black
23              Expression    0.01755       0.00067 black
24                  Gender    0.01604       0.00070 black
25     Spend.Time.Together    0.01602       0.00085 black
26            Togetherness    0.01579       0.00057 black
27                   Trust    0.01523       0.00060 black
28       Successful.Family    0.01447       0.00041 black
29            Family.Pride    0.01283       0.00052 black
30          Family.Respect    0.01271       0.00053 black
31           Income_median    0.01160       0.00049 black
32                 Loyalty    0.01095       0.00057 black
33        Dental.Insurance    0.01084       0.00055 black
34        Health.Insurance    0.00472       0.00026 black
importance_plot <- ggplot(vi, aes(x = reorder(Variable, Importance), y = Importance, fill=fill))+
  geom_bar(stat = "identity",alpha=0.4) +
  geom_errorbar(aes(ymin=Importance-sd_Importance, ymax = Importance+sd_Importance))+
  
  labs(title = "Variable Importance", x = "Variable", y = "Importance") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values=c("black","red"),
                    guide="none")
  
plot(importance_plot)

ggsave(filename = "VSURF_importance_Alt.png", width=12, height=8,units="in")

Logistic regression (Interpretation)

lr <- rfdata |> select(Folkmedicine,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Folkmedicine~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Folkmedicine ~ ., family = binomial, data = lr)

Coefficients:
                                           Estimate Std. Error z value Pr(>|z|)
(Intercept)                               -3.016335   0.934940  -3.226  0.00125
EthnicityAsian Indian                     -0.030272   0.473625  -0.064  0.94904
EthnicityFilipino                         -0.635937   0.356061  -1.786  0.07409
EthnicityKorean                            0.141827   0.214351   0.662  0.50819
EthnicityOther                            -0.581345   0.395935  -1.468  0.14203
EthnicityVietnamese                       -1.220005   0.290934  -4.193 2.75e-05
Age                                        0.022399   0.004758   4.708 2.50e-06
See.Friends1                              -0.409254   0.513238  -0.797  0.42522
See.Friends2                               0.035686   0.468279   0.076  0.93926
See.Friends3                              -0.567571   0.466071  -1.218  0.22331
See.Friends4                              -0.049861   0.482450  -0.103  0.91769
See.Friends5                              -0.098293   0.493940  -0.199  0.84226
ReligionBuddhist                           0.347368   0.315124   1.102  0.27032
ReligionCatholic                          -0.033902   0.358719  -0.095  0.92471
ReligionHindu                             -0.465896   0.544998  -0.855  0.39263
ReligionMuslim                            -2.281513   1.126714  -2.025  0.04287
ReligionOther                              0.288843   0.619801   0.466  0.64120
ReligionProtestant                         0.044756   0.321309   0.139  0.88922
English.SpeakingNot well                  -0.235820   0.311594  -0.757  0.44916
English.SpeakingVery well                 -0.695889   0.349007  -1.994  0.04616
English.SpeakingWell                      -0.133941   0.314184  -0.426  0.66988
See.Family1                                0.454397   0.475916   0.955  0.33969
See.Family2                                0.340928   0.421214   0.809  0.41829
See.Family3                                0.679576   0.397195   1.711  0.08709
See.Family4                                0.779675   0.421481   1.850  0.06434
See.Family5                                0.523445   0.455067   1.150  0.25004
Helpful.Family1                           -0.046634   0.486931  -0.096  0.92370
Helpful.Family2                           -0.405006   0.473538  -0.855  0.39240
Helpful.Family3                           -0.694874   0.486922  -1.427  0.15356
Helpful.Family4                           -0.458485   0.527260  -0.870  0.38454
Helpful.Family5                           -0.574532   0.586975  -0.979  0.32768
Full.Time.EmploymentEmployed full time    -0.211554   0.154986  -1.365  0.17226
Close.Family1                              0.471797   0.402300   1.173  0.24090
Close.Family2                              0.437042   0.393382   1.111  0.26657
Close.Family3                              0.251498   0.408651   0.615  0.53827
Close.Family4                              0.271550   0.472489   0.575  0.56548
Close.Family5                              0.020205   0.584695   0.035  0.97243
Religious.AttendanceNever                  0.040029   0.313644   0.128  0.89844
Religious.AttendanceOnce or twice a month  0.258662   0.286676   0.902  0.36691
Religious.AttendanceSeldom                 0.228633   0.306964   0.745  0.45638
Helpful.Friends1                           0.786293   0.477640   1.646  0.09972
Helpful.Friends2                           0.690687   0.474400   1.456  0.14542
Helpful.Friends3                           0.450932   0.489155   0.922  0.35660
Helpful.Friends4                           0.388475   0.544055   0.714  0.47520
Helpful.Friends5                           0.396188   0.600617   0.660  0.50949
English.DifficultiesMuch                  -0.151615   0.243073  -0.624  0.53280
English.DifficultiesNot much              -0.114987   0.223525  -0.514  0.60696
English.DifficultiesVery much             -0.279831   0.241611  -1.158  0.24679
Close.Friends1                            -0.221262   0.410049  -0.540  0.58947
Close.Friends2                             0.137090   0.411928   0.333  0.73929
Close.Friends3                             0.401494   0.422132   0.951  0.34155
Close.Friends4                             0.388513   0.494921   0.785  0.43245
Close.Friends5                             0.349774   0.576811   0.606  0.54425
Get.AlongAgree                            -0.283180   0.514286  -0.551  0.58189
Get.AlongDisagree                         -0.229422   0.549949  -0.417  0.67656
Get.AlongNeutral                          -0.000129   0.510670   0.000  0.99980
Get.AlongStrongly agree                   -0.342164   0.557705  -0.614  0.53953
Religious.ImportanceNot very important     0.055863   0.300491   0.186  0.85252
Religious.ImportanceSomewhat important    -0.088913   0.340581  -0.261  0.79404
Religious.ImportanceVery important         0.157524   0.362294   0.435  0.66371
                                             
(Intercept)                               ** 
EthnicityAsian Indian                        
EthnicityFilipino                         .  
EthnicityKorean                              
EthnicityOther                               
EthnicityVietnamese                       ***
Age                                       ***
See.Friends1                                 
See.Friends2                                 
See.Friends3                                 
See.Friends4                                 
See.Friends5                                 
ReligionBuddhist                             
ReligionCatholic                             
ReligionHindu                                
ReligionMuslim                            *  
ReligionOther                                
ReligionProtestant                           
English.SpeakingNot well                     
English.SpeakingVery well                 *  
English.SpeakingWell                         
See.Family1                                  
See.Family2                                  
See.Family3                               .  
See.Family4                               .  
See.Family5                                  
Helpful.Family1                              
Helpful.Family2                              
Helpful.Family3                              
Helpful.Family4                              
Helpful.Family5                              
Full.Time.EmploymentEmployed full time       
Close.Family1                                
Close.Family2                                
Close.Family3                                
Close.Family4                                
Close.Family5                                
Religious.AttendanceNever                    
Religious.AttendanceOnce or twice a month    
Religious.AttendanceSeldom                   
Helpful.Friends1                          .  
Helpful.Friends2                             
Helpful.Friends3                             
Helpful.Friends4                             
Helpful.Friends5                             
English.DifficultiesMuch                     
English.DifficultiesNot much                 
English.DifficultiesVery much                
Close.Friends1                               
Close.Friends2                               
Close.Friends3                               
Close.Friends4                               
Close.Friends5                               
Get.AlongAgree                               
Get.AlongDisagree                            
Get.AlongNeutral                             
Get.AlongStrongly agree                      
Religious.ImportanceNot very important       
Religious.ImportanceSomewhat important       
Religious.ImportanceVery important           
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1505.5  on 1898  degrees of freedom
Residual deviance: 1337.0  on 1839  degrees of freedom
AIC: 1457

Number of Fisher Scoring iterations: 6
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Folkmedicine
                     LR Chisq Df Pr(>Chisq)    
Ethnicity             27.3067  5  4.971e-05 ***
Age                   22.1591  1  2.510e-06 ***
See.Friends           10.3396  5    0.06617 .  
Religion              10.0898  6    0.12092    
English.Speaking       7.5875  3    0.05535 .  
See.Family             5.8019  5    0.32597    
Helpful.Family         5.8421  5    0.32189    
Full.Time.Employment   1.8742  1    0.17100    
Close.Family           2.5272  5    0.77239    
Religious.Attendance   1.3966  3    0.70633    
Helpful.Friends        3.9530  5    0.55621    
English.Difficulties   1.3945  3    0.70682    
Close.Friends          3.8251  5    0.57486    
Get.Along              3.6842  4    0.45043    
Religious.Importance   1.5161  3    0.67857    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()